# Bernd Beber, Philip Roessler, and Alexandra Scacco
# "Intergroup Violence and Political Attitudes: Evidence from a Dividing Sudan", Journal of Politics
# Code to perform kriging estimation and generate measure of indirect riot exposure
# Note that respondent identifiers and locations (id_main, g7_latitude, and g7_longitude) are not included in public data file!

# package to load Stata data
    require(foreign)

# package to change projection
    require(rgdal)

# package to perform kriging
    require(gstat)

# load survey data
    survey <- read.dta("Beber_Roessler_Scacco_JOP_Data.dta")

# function to perform kriging for different measures of riot exposure
# some variable names and geospatial characteristics are hard-coded!
# `z' is the input measure
# `target' specifies whether predicted values should be computed on a grid or at `obs'erved interview locations
# `excl.movers' indicates if respondents who have moved into the neighborhood in 2005 or later are excluded
    riot.krige <- function(z = "riots_direct", target = "obs", excl.movers = TRUE, excl.southerners = TRUE) {

    # extract data on riot exposure and coordinates
        d <- survey[, c("id_main", z, "g7_latitude", "g7_longitude")]
        colnames(d) <- c("id_main", "riot", "g7_latitude", "g7_longitude")

    # leave out respondents who moved in 2005 or later and/or southerners
        if(excl.movers) d <- d[!(survey[, c("a29")]>=0 & survey[, c("a29")]<=5 & !is.na(survey[, c("a29")])), ]
        if(excl.southerners) d <- d[!(survey[, c("south")]==1 & !is.na(survey[, c("south")])), ]

    # remove observations with missing data
        r <- na.omit(d)
        r <- r[r[, "g7_latitude"]!=0 & r[, "g7_longitude"]!=0, ]

    # convert into spatial data frame object
        # conventionally, use y for latitude (North-South), x for longitude (East-West)
        coordinates(r) <- ~ g7_longitude + g7_latitude

        # projection is WGS84
        # for details of EPSG code, see: http://spatialreference.org/ref/epsg/4326/
        # alternatively, specify a proj4 string directly: "+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs"
        proj4string(r) <- CRS("+init=epsg:4326")
        # to see components of SpatialPointsDataFrame object: attributes(r)

    # reproject from geographic into projected coordinate system
        # for reprojection example, see: http://r-sig-geo.2731867.n2.nabble.com/LatLong-to-UTM-td7580117.html
        # project into WGS 84 / UTM Zone 36N
        # for details, see: http://spatialreference.org/ref/epsg/32636/
        u <- spTransform(r, CRS("+init=epsg:32636"))

    # create grid for interpolation
        if(target=="obs") {
            # grid for interpolation consists of all respondent coordinates
                g <- survey[, c("id_main", "g7_latitude", "g7_longitude")]
                g <- na.omit(g)
                g <- g[g[, "g7_latitude"]!=0 & g[, "g7_longitude"]!=0, ]
                coordinates(g) <- ~ g7_longitude + g7_latitude
                proj4string(g) <- CRS("+init=epsg:4326")
                g <- spTransform(g, CRS("+init=epsg:32636"))
        } else {
            # create a grid over data extent for interpolation
            # get data range
                x.range <- as.integer(range(u@coords[,1]))
                y.range <- as.integer(range(u@coords[,2]))
            # expand to grid with 100 meter spacing
                g <- expand.grid(x=seq(from=x.range[1], to=x.range[2], by=100), y=seq(from=y.range[1], to=y.range[2], by=100) )
            # convert to SpatialPixel class
                coordinates(g) <- ~ x + y
                gridded(g) <- TRUE
               proj4string(g) <- CRS("+init=epsg:32636")
        }

    # to visualize sample points and interpolation grid
        # plot(u)
        # points(g, col="red")
        # title("Interpolation and sample points")

    # need a model of spatial variation to perform prediction
    # sample variogram
        v <- variogram(riot ~ 1, data=u)
        # plot(v)

    # fit a model variogram
    # employ commonly used `Sph'erical model, with no spatial correlation beyond `range' meters
    # variability at distances below typical sample spacing are represented by `nugget'
    # commonly set `range' to half of the diagonal of the spatial extent, which is about 19km
        # sqrt(dist(range(u@coords[,1]))^2 + dist(range(u@coords[,2]))^2) / 2
    # longest distance between any two sample points is about 29km
        # max(dist(u@coords))
    # here set range to 12km for improved model fit
    # see also http://www.ecst.csuchico.edu/~juliano/csci693/Presentations/2008w/Materials/Kalkundrikar/DOCS/Variograms.pdf
        fit.v <- fit.variogram(v, vgm(model="Sph", nugget=.2, range=12000))

    # visually evaluate fit
    # declining semivariance at large distances perhaps due to "hole effects"
        if(target=="obs") print(plot(v, model=fit.v, main=paste("Model fit to empirical semivariogram for", z)))

    # create object on which kriging can be performed
    # for details, see e.g. http://casoilresource.lawr.ucdavis.edu/drupal/node/442
    # for non-missing kriging prediction, at least `nmax' observations must be within `maxdist' meters
        k <- gstat(id=z, formula=riot ~ 1, data=u, model=fit.v, nmin=3, maxdist=500)

    # ordinary kriging
    # alternatively to predict.gstat, use `krige'
        p <- predict(k, newdata=g)
    # alternative kriging implementations are available in geoR or kriging libraries:
    # http://rss.acs.unt.edu/Rdoc/library/geoR/html/krige.conv.html
    # http://cran.r-project.org/web/packages/kriging/kriging.pdf

    # return predicted exposure and identifier for interview locations, if applicable
        if(target=="obs") {
            out <- cbind(as.data.frame(g), as.data.frame(p))
            out <- out[, grep("id_main|.pred", colnames(out))]
        } else {
            out <- p
        }
        return(out)
    }

# save variogram and perform kriging
    pdf(file = "Beber_Roessler_Scacco_JOP_Variogram.pdf", width = 8, height = 8)
        riots_indirect <- riot.krige("riots_direct")
    dev.off()

# heat map in figure 1 was created in ArcMap
# in R, could run kriging on grid and plot heat map
    # riots_indirect_grid <- riot.krige("riots_direct", target="grid")
    # image(riots_indirect_grid, col=terrain.colors(10))

# to save output in Stata format
    # colnames(riots_indirect) <- c("id_main", "riots_indirect")
    # write.dta(riots_indirect, "indirect_riot_exposure.dta")
